{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\SusanLi\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\sklearn\\ensemble\\weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release.\n",
      "  from numpy.core.umath_tests import inner1d\n"
     ]
    }
   ],
   "source": [
    "import datetime\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "%matplotlib inline\n",
    "\n",
    "from sklearn.model_selection import cross_val_score\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.pipeline import make_pipeline\n",
    "from sklearn import preprocessing\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn import svm"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Specifying dtypes helps reduce memory requirements for reading in csv file later."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# data_type={'is_booking':bool,'srch_ci' : np.str_, 'srch_co' : np.str_,\n",
    "           'srch_adults_cnt' : np.int32, 'srch_children_cnt' : np.int32,\n",
    "           'srch_rm_cnt' : np.int32, 'srch_destination_id':np.int32,\n",
    "           'user_location_country' : np.int32, 'user_location_region' : np.int32,\n",
    "           'user_location_city' : np.int32, 'hotel_cluster' : np.int32,\n",
    "           'orig_destination_distance':np.float64, 'date_time':np.str_,\n",
    "           'hotel_market':np.int32}\n",
    "# d_type={'is_booking':bool, 'cnt':np.int32, 'hotel_cluster' : np.int32,'srch_destination_id':np.int32}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To be able to process locally, we randomly sample 1% of the records. After that, we still have a large number of records at 241,179."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(241179, 24)"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('train.csv.gz', sep=',').dropna()\n",
    "dest = pd.read_csv('destinations.csv.gz')\n",
    "df = df.sample(frac=0.01, random_state=99)\n",
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date_time</th>\n",
       "      <th>site_name</th>\n",
       "      <th>posa_continent</th>\n",
       "      <th>user_location_country</th>\n",
       "      <th>user_location_region</th>\n",
       "      <th>user_location_city</th>\n",
       "      <th>orig_destination_distance</th>\n",
       "      <th>user_id</th>\n",
       "      <th>is_mobile</th>\n",
       "      <th>is_package</th>\n",
       "      <th>...</th>\n",
       "      <th>srch_children_cnt</th>\n",
       "      <th>srch_rm_cnt</th>\n",
       "      <th>srch_destination_id</th>\n",
       "      <th>srch_destination_type_id</th>\n",
       "      <th>is_booking</th>\n",
       "      <th>cnt</th>\n",
       "      <th>hotel_continent</th>\n",
       "      <th>hotel_country</th>\n",
       "      <th>hotel_market</th>\n",
       "      <th>hotel_cluster</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>32352134</th>\n",
       "      <td>2014-05-22 11:40:07</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>66</td>\n",
       "      <td>174</td>\n",
       "      <td>24103</td>\n",
       "      <td>2323.5232</td>\n",
       "      <td>802499</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1442</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>125</td>\n",
       "      <td>177</td>\n",
       "      <td>44</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29796021</th>\n",
       "      <td>2013-06-29 12:24:37</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>66</td>\n",
       "      <td>311</td>\n",
       "      <td>25538</td>\n",
       "      <td>2288.6121</td>\n",
       "      <td>85229</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>8272</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>50</td>\n",
       "      <td>659</td>\n",
       "      <td>59</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15185156</th>\n",
       "      <td>2014-10-30 13:58:32</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>66</td>\n",
       "      <td>294</td>\n",
       "      <td>40046</td>\n",
       "      <td>587.6970</td>\n",
       "      <td>755217</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>11321</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>50</td>\n",
       "      <td>642</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3301948</th>\n",
       "      <td>2014-08-22 20:14:34</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>66</td>\n",
       "      <td>332</td>\n",
       "      <td>55121</td>\n",
       "      <td>2234.4394</td>\n",
       "      <td>160733</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1152</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>47</td>\n",
       "      <td>1502</td>\n",
       "      <td>65</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25429119</th>\n",
       "      <td>2014-03-25 18:47:43</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>66</td>\n",
       "      <td>314</td>\n",
       "      <td>47869</td>\n",
       "      <td>839.0087</td>\n",
       "      <td>1078493</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>8284</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>50</td>\n",
       "      <td>685</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                    date_time  site_name  posa_continent  \\\n",
       "32352134  2014-05-22 11:40:07          2               3   \n",
       "29796021  2013-06-29 12:24:37          2               3   \n",
       "15185156  2014-10-30 13:58:32          2               3   \n",
       "3301948   2014-08-22 20:14:34          2               3   \n",
       "25429119  2014-03-25 18:47:43          2               3   \n",
       "\n",
       "          user_location_country  user_location_region  user_location_city  \\\n",
       "32352134                     66                   174               24103   \n",
       "29796021                     66                   311               25538   \n",
       "15185156                     66                   294               40046   \n",
       "3301948                      66                   332               55121   \n",
       "25429119                     66                   314               47869   \n",
       "\n",
       "          orig_destination_distance  user_id  is_mobile  is_package  \\\n",
       "32352134                  2323.5232   802499          0           1   \n",
       "29796021                  2288.6121    85229          0           0   \n",
       "15185156                   587.6970   755217          0           1   \n",
       "3301948                   2234.4394   160733          0           1   \n",
       "25429119                   839.0087  1078493          0           0   \n",
       "\n",
       "              ...        srch_children_cnt srch_rm_cnt srch_destination_id  \\\n",
       "32352134      ...                        0           1                1442   \n",
       "29796021      ...                        1           1                8272   \n",
       "15185156      ...                        0           1               11321   \n",
       "3301948       ...                        0           1                1152   \n",
       "25429119      ...                        0           1                8284   \n",
       "\n",
       "          srch_destination_type_id  is_booking  cnt  hotel_continent  \\\n",
       "32352134                         3           0    1                4   \n",
       "29796021                         1           0    1                2   \n",
       "15185156                         1           0    1                2   \n",
       "3301948                          1           1    1                4   \n",
       "25429119                         1           0    4                2   \n",
       "\n",
       "          hotel_country  hotel_market  hotel_cluster  \n",
       "32352134            125           177             44  \n",
       "29796021             50           659             59  \n",
       "15185156             50           642             22  \n",
       "3301948              47          1502             65  \n",
       "25429119             50           685              6  \n",
       "\n",
       "[5 rows x 24 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### EDA\n",
    "\n",
    "#### What are we predicting?\n",
    "The objective is to predict which hotel_cluster a user will book given the information in their search. There are 100 clusters in total. In another word, we are dealing with a 100 class classification problem."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\SusanLi\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\scipy\\stats\\stats.py:1706: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.\n",
      "  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x19d9a495a90>"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAtMAAAF3CAYAAABnkcdUAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzs3Xd43FeZ9vHvGY16771Yllzk3h07TnN6QhIgPZAAIWGBwC4ssOy+S1kueHezBVgg8FJCIAlphBAc0ntc4l7kbkuyeu+9znn/kBIcx0UaaTQz0v25Ll2WRr+ZecaWNfeceX7PMdZaRERERERk7BzeLkBERERExF8pTIuIiIiIuElhWkRERETETQrTIiIiIiJuUpgWEREREXGTwrSIiIiIiJsUpkVERERE3KQwLSIiIiLiJoVpERERERE3Ob1dwFgkJCTYnJwcb5chIiIiIlPYrl27Gq21iaM51q/CdE5ODjt37vR2GSIiIiIyhRljykZ7rNo8RERERETcpDAtIiIiIuImhWkRERERETcpTIuIiIiIuElhWkRERETETQrTIiIiIiJuUpgWEREREXGTwrSIiIiIiJsUpkVERERE3KQwLSIiIiLiJoVpERERERE3KUyLiIiIiLhJYVpERERExE1ObxcgIiIi4use21Y+6mNvX5XlwUrE12hlWkRERETETQrTIiIiIiJuUpgWEREREXGTwrSIiIiIiJsUpkVERERE3KQwLSIiIiLiJoVpERERERE3KUyLiIiIiLhJYVpERERExE0K0yIiIiIiblKYFhERERFxk8K0iIiIiIibFKZFRERERNykMC0iIiIi4iaFaRERERERNylMi4iIiIi4SWFaRERERMRNCtMiIiIiIm5SmBYRERERcZPCtIiIiIiImxSmRURERETcpDAtIiIiIuImhWkRERERETcpTIuIiIiIuElhWkRERETETQrTIiIiIiJuUpgWEREREXGTwrSIiIiIiJsUpkVERERE3KQwLSIiIiLiJoVpERERERE3KUyLiIiIiLhJYVpERERExE0K0yIiIiIiblKYFhERERFxk8K0iIiIiIibFKZFRERERNw0qjBtjLnSGHPUGFNkjPnmab4fbIx5cuT724wxOSOXX2aM2WWM2T/y5yUnXWfZyOVFxpifGGPMRD0oEREREZHJcM4wbYwJAB4ArgIKgNuMMQWnHHY30GKtzQN+BNw/cnkj8BFr7QLgLuCRk67zC+BeIH/k48pxPA4RERERkUk3mpXplUCRtbbEWtsPPAFcf8ox1wO/H/n8aWC9McZYa/dYa6tHLj8IhIysYqcCUdbad621FngYuGHcj0ZEREREZBKNJkynAxUnfV05ctlpj7HWDgJtQPwpx3wc2GOt7Rs5vvIctwmAMeZeY8xOY8zOhoaGUZQrIiIiIjI5RhOmT9fLbMdyjDFmHsOtH58bw20OX2jtr6y1y621yxMTE0dRroiIiIjI5BhNmK4EMk/6OgOoPtMxxhgnEA00j3ydAfwZuNNaW3zS8RnnuE0REREREZ82mjC9A8g3xswwxgQBtwIbTjlmA8MnGALcCLxhrbXGmBjgeeCfrbWb3zvYWlsDdBhjVo9M8bgT+Ms4H4uIiIiIyKQ6Z5ge6YG+D3gZOAw8Za09aIz5njHmupHDHgTijTFFwFeB98bn3QfkAd8yxuwd+Uga+d7ngd8ARUAx8OJEPSgRERERkcngHM1B1toXgBdOuezbJ33eC9x0mut9H/j+GW5zJzB/LMWKiIiIiPgS7YAoIiIiIuImhWkRERERETcpTIuIiIiIuElhWkRERETETQrTIiIiIiJuUpgWEREREXGTwrSIiIiIiJsUpkVERERE3KQwLSIiIiLiJoVpERERERE3KUyLiIiIiLhJYVpERERExE0K0yIiIiIiblKYFhERERFxk8K0iIiIiIibFKZFRERERNykMC0iIiIi4iaFaRERERERNylMi4iIiIi4SWFaRERERMRNCtMiIiIiIm5SmBYRERERcZPCtIiIiIiImxSmRURERETc5PR2ASIiIt722LbyUR97+6osD1YiIv5GK9MiIiIiIm7SyrSIyElGu0Kp1UkREQGtTIuIiIiIuE1hWkRERETETQrTIiIiIiJuUpgWEREREXGTwrSIiIiIiJsUpkVERERE3KQwLSIiIiLiJoVpERERERE3KUyLiIiIiLhJYVpERERExE0K0yIiIiIiblKYFhERERFxk8K0iIiIiIibFKZFRERERNykMC0iIiIi4iaFaRERERERNylMi4iIiIi4SWFaRERERMRNCtMiIiIiIm5SmBYRERERcZPCtIiIiIiImxSmRURERETcpDAtIiIiIuImp7cLEBGRsXtsW/moj719VZYHKxERmd60Mi0iIiIi4iaFaRERERERNylMi4iIiIi4SWFaRERERMRNCtMiIiIiIm5SmBYRERERcZPCtIiIiIiImxSmRURERETcNKowbYy50hhz1BhTZIz55mm+H2yMeXLk+9uMMTkjl8cbY940xnQaY352ynXeGrnNvSMfSRPxgEREREREJss5d0A0xgQADwCXAZXADmPMBmvtoZMOuxtosdbmGWNuBe4HbgF6gW8B80c+TnWHtXbnOB+DiIiIiIhXjGZleiVQZK0tsdb2A08A159yzPXA70c+fxpYb4wx1toua+0mhkO1iIiIiMiUMpownQ5UnPR15chlpz3GWjsItAHxo7jth0ZaPL5ljDGnO8AYc68xZqcxZmdDQ8MoblJEREREZHKMJkyfLuRaN4451R3W2gXAupGPT57uIGvtr6y1y621yxMTE89ZrIiIiIjIZBlNmK4EMk/6OgOoPtMxxhgnEA00n+1GrbVVI392AI8x3E4iIiIiIuI3RhOmdwD5xpgZxpgg4FZgwynHbADuGvn8RuANa+0ZV6aNMU5jTMLI54HAtcCBsRYvIiIiIuJN55zmYa0dNMbcB7wMBAC/tdYeNMZ8D9hprd0APAg8YowpYnhF+tb3rm+MKQWigCBjzA3A5UAZ8PJIkA4AXgN+PaGPTERERETEw84ZpgGstS8AL5xy2bdP+rwXuOkM1805w80uG12JIiIiIiK+STsgioiIiIi4SWFaRERERMRNCtMiIiIiIm5SmBYRERERcZPCtIiIiIiImxSmRURERETcpDAtIiIiIuImhWkRERERETcpTIuIiIiIuElhWkRERETETQrTIiIiIiJuUpgWEREREXGT09sFiIiIZz22rXxUx92+KsvDlYiITD0K0yIiIiJTxGhfPINeQE8UtXmIiIiIiLhJYVpERERExE1q85gi9LaOiIiIyOTTyrSIiIiIiJsUpkVERERE3KQwLSIiIiLiJvVMi4yT+tVFRESmL61Mi4iIiIi4SWFaRERERMRNCtMiIiIiIm5Sz7SckXqBRUREPEfPs1ODVqZFRERERNykMC0iIiIi4ia1eYiInENTZx8vHKhlyOVibV4CeYkR3i7pfR29AxQ3dJEcFUxyVAgOY7xdkojItKIwLSJyBi5r2VLUyKuH63AYQ1CAg4c2l5IWHUJkaCBXz0/BGeC9N/gO17Tzp92VdPcPARDkdJARE0pmXBjz0qLIiA3zWm0iItOFwrSIyGnUtvfyzO5KKlt6mJMSyfWL0wkPCmBvRSvvHG/ky4/v4T9jQ/nPGxeyZmbCpNbWOzDEhn1VbC1pJjU6hNtXZdHeM0B5cw8Vzd1sPN7A5qJG7lmXS2acArWIN5U2drFhXzUb9lVT197LBfmJrJ+bxEWzk7xdmkwQhWkRkVPsKW/hmd1VBAc6uGVFJgvTozEj7RPLc+JYmh1LUmQw//HSET738C7+/MW15CVNTuvHkdp2vvz4Ho7VdXJ+XgKXFyS/vzq+ODMWgM6+QX7xVhGPbi3jCxfnER0aOCm1ibhjtBMt/GmaRe/AEI9uLWPDvmoKK9sAWDkjjsWZMbx9rIHn99fgMJAZG8ay7FiWZseqRcuPKUyLiJyko3eADfuqyYgN5ROrswkP/vCvSYcxXD4vhYK0KG54YDOf/f0Onv3iWmLCgjxa27vFTdz10HaiQgL51JocZiVHnva4iGAnd56Xwy/eLubRrWXcsy6XIKfON59snhh7plFqvs9ay9efLuS5fdXMT4/iX66ew7UL00iLCQXA5bIcqG7j9cP1PLmjgmf2VLGzrIXrF6eRGh3q5erFHfrtKiJyklcO1jE4ZPnY0ozTBumTZcSG8ctPLqO6tZcv/GE3A0Muj9VV0dzNF/6wi6y4MF76h3VnDNLvSY4K4dblmVS39vCn3ZVYaz1Wm4j8zdYTzTy3r5qvXT6Lv35pHfdeMPP9IA3gcBgWZsTwlctm8aVL8rhxWQZNnX088GYRzxdW0zcw5MXqxR1amRY5g7GsAMnUsKushV3lLVyQn0hiZPCorrMsO45//9gC/vGP+/i35w7y/RsWTHhdnX2DfPb3OxlyWX5953ISIkZX25zUKK6Yl8JLB2tJjgrhkjnq0RTxpIrmbl4orOGSOUl84aK8cx5vjGFpVixzUiJ55WAdW4qb2F/Vxo3LMietdUzGTyvTIiLAkMvynQ0HiApxcvGcxDFd9+PLMvjchbk8urWch98tndC6XC7LV5/cy/H6Dh64YykzEsLHdP11+QksyYzhtcN1HKpum9DaRORvuvsGeXx7OZGhTn548yIcjtH3QIcFOblhSTqfu3AmIYEB/G7LCbadaPJgtTKRtDItIgI8saOcA1Xt3LIik2BnwDmPP/Wdi8zYMOakRPLdDQcpbez+wKrSeHpXf/z6cV45VMe3ri1gXf7YQj4Mr3zdsCSduo5eniusIT85kkAvjvMTmYpc1vLUrgo6+gb53AW5bp8/kRUXxucvnMkTOyr4y95qGjv6uGpBqk5O9HH6jSoi015LVz//9fJRVs2IY2F6tFu34TCGW5ZnkhgZzGPby2js6Bt3XS/sr+Enrx/npmUZfGZtjtu3Exjg4Kr5qbT1DLDtRPO46xKRD3rraD3H6jq5dmHquOe7BwcG8MnzslkzM57NxU08urVMfdQ+TmFaRKa9/3rlKB29g3zv+vnvj8BzR3BgAJ9cnYPDGB7eWkpPv/tPgLvKmvnqU3tZmhXD9z86vroAZiZGkJcUwVtH6/XELDKBatt6ef1wPYszY1iZEzcht+kwhmsXpnHdojSO1XXwy3dKaJiAF+jiGQrTIjKtHahq4/Ht5dx1Xg6zU84+IWM04sKDuGNVNi1dAzy+o5wh19inaByqbudTD+0gNTqUX35y+ajaTkbj8oJkuvuH2FTcOCG3JyKwqagRZ4Dh2oWp437Re6rVufHceV4OTV19fPnxPW79PhHPU5gWkWntF28XExHk5B8uy5+w25yREM71i9Moqu/khf01Y7ruicYu7vztdiKCnTxy98pRTxUZjYzY4W3GNx1vpKtvcMJuV2S66ugdYF9lK8uyYwkL8sxpaLOSI7l+UTrvljTxw1ePeuQ+ZHx0AqKITFtVrT28dKCWu8+fQVTIxO4SuDwnjvqOPjYVNfKHbWXcsSr7nNepaevhE7/ZhstaHrl79bh7L0/n0rnJHKpu551jDVy1IHXCb3+66eob5Hh9JwDvrUkaw5j/7TSK0z9tLWnG5bKsmZng0ftZmh2LM8DwwJvFLMuO5ZI5yR69PxkbhWkRmbZ+v6UUgLvW5Hjk9q+cn0J9Ry/fevYAh6rb+drls4kNP/1Z/s1d/Xzywe209Qzw+D2rPTZjNjkqhCVZMbxb0sSavARtNe6muvZefrellD9sLaO998Or/A4D2fHhFKRGMS8tyuO7Y8rkGxhyse1EE3NSo0Y9+308vnvdPAor2/jKk/v465fOJzNu4l9si3sUpkVkWuoamQl75fwU0mM8s4WvwxhuW5lFWVM3j2wt46+FNXzt8lncviqbAIfBWsvB6naeK6xmw95qmrv6+f1nVrIgw72JIqO1fk4y+yraePNIPTcsSffofY3FaFdnvblNdl17L1//4z6e3VvFkMty5fwUrluUTpBzeF3aWnBZ2F/ZyhM7Knh+fw3P768hPSaUtXnxLMqImfC+WvGOPeWtdPcPsTYvflLu75ndVVw1P4UH3iri1l9t5XMX5OLUmEufoDAtItPSH3dW0NE7yN3nz/Do/QQ7A/judfO4bWUW391wkG/95SCPba/g4tmJvHSwlpKGLpwOw7r8BP7uwpmsyvX8E3NseBArZsSx/UQT6/ITiJ+EVbWpYOPxBl48UEtIoIPbVmZx9/kzyI4//SY6lxUkkxIdSmNHHwdr2tlX0cpTOyvZVtLMRxalfWB7afE/LmvZXNRIWkwIM87wM+AJ8RHB3Lg0g0e3lfP8/hquX+w7L4anM4VpEfkQf1ghHI8hl+WhLaUsyYphaVbspNzn7JRIHrtnFS8eqOUHzx/mF28Xs3pGPJ89P5er5qecsf3DUy6enciusmbeOtrAx5dlTOp9+6PNRY28eKCW+enRPPyZlcSN8t8rITKYCyMTWZefwO6yFl4+WMsDbxaxIieOywqSCQ/W07A/Ol7XQUNnHzcvz5j0dxoK0qI5Py+BTUWNLM2KVbuHD9D/YhGZdl4/XEdZUzdfv2L2pN6vMYarF6Ry6dxkevqHiA7zXr9yZEggy7Pj2HaiifVzk9TTexbvljTx/P4a5qVFccvyzFEH6ZM5jGF5Thzz0qJ5/UgdW0ua2F/Vxi0rMpmVPP6RjDK5NhU1EhXiZL6bmzyN1/o5Sewqa+HtYw18YvW5T24Wz1KYFpFp58FNJ0iPCeXKeSleuf8gp4Mg54d7HSd7osO6/AS2nWhiU1Ej1y5Mm9T79hfbTzTz3L5q5qZGceuKLAIc41uFDA0K4NqFaazIiePJHRU88m4ZNy3PYGFGzARVLJ5W09ZDcUMXV8xLwenwTs9ycGAAq3PjefNoPfXtvSRFhXilDhmmznURmVYOVLWx7UQzd63JnvYn78SEBbE4M4Ydpc10au70h+wsbebZvVXMTo7kthWZ4w7SJ0uOCuGedblkxIXy5I4KtpY0Tdhti2dtLmokMMBM2G6H7jpvZjyBAYaNx7UJk7dpZVpEJoWv9GH/dtMJwoICuGWFf/Z7T7QL8hPZU97Ku8WN3HtBrrfL8RkljZ38eU8V+UkR3L4qyyMvvEKDAvj0mhk8vr2cDfuq6e4f5OLZSZr24cN6+ofYV9nG8uxYQoMmZmdSd0UEO1meE8e2ErVqedv0XpYRkWmlrr2X5wqruXl5puYrj0iKCmFuahTvljTR0Tvg7XJ8Qv+gi2d2VxE7sjV8oAffwQhyOvjE6mwWZ8bw2uF6Xthfg7XaMtpXFVa1MuSyLM/27qr0e87PG94sZnORVqe9SWFaRKaNh98tZdBl+fTaHG+X4lMump1I74BLu/CNeOVQLc1d/Xxsafppe9snWoDDcOOyDM6bGc/m4ia2nWj2+H2Ke/ZWtJIYGUxajG/0KMeGBbEoI4btpc10q1XLa9TmITKJfKXVYTrq6R/iD9vKuXRu8hlnA09XGbFh5CVG8JtNJ7hrTQ4hgd59+9qbShu7eLe4idW58eQmeGYXytNxGMM1C1Jp6uzj+f01ZMWFaRa1j2nu6qesqZvLC5J9qhXnglmJ7KloZUtJE5fO1Tbj3qCVaRE3WGtp7urnRGMX+6vaeLe4kVcO1bLxeAPd/Vod8EXP7KmktXvA45u0+KsLZyfS0NHHn3ZXersUr+kfdPGn3ZXEhAVyxbzJDyUOY7hpWSbhQQE8tr2c3oGhSa9BzmxvRQsAizJ9a/JK8nutWsVN9A3qZ8YbtDItMkadfYM8s7uSI7UdH7jcYYa3EX7tcB3Ls+NYm5fg1jxamXgul+W3m04wLy2KVTN8o9fR1+QmhLMoM4Zfvl3CLcszp+Wkk9cO19HU1c/d588g2Omd1fnwYCe3rsjiN5tK+POeKm5dkelTq6DTlbWWvRWtzEgIJ9YHT/S7cFYih2va2VnawtqRPmqZPArTMqnG0pPpi60Ox+s6eHpXJd0DQ1w6N5nMuFAigwOJCHESFhRAfXsfG483sO1EE1tLmpifHs36OUmaAeplbx9voLihix/evEjB5AyMMXzxopnc+8guntlTxc3LM71d0qQqb+5mc1EjK3PimJk4ee0dp5OTEM6lc5N55VAduYnhrJrh+S3m5ez2VbbR2NnPBfmJ3i7ltLLiwpiREM7m4kbWzIzX77lJpjAtMgoDQy5eOVjL5uImkiKD+dTaHFKjP9zPmBIdwk3LM7l8XgpbihrZXtrM0doOblyW4bWdsqay0b44e/FADUmRwdqY5BwuK0hmUUY0P371GNctSps2vdPvtXdEhQZy5XzvbORzqgtmJXKisYvnC2vIjFX/tLf9eXclTodhXprv/h5fmhXLn3ZXUt3aS3qsfl4mk8K0yDl09A7wuy2l1LT1sjo3nqvmp5xzVFZ0aCBXLUhlbX4Cf9haxmPby1k/N4mLZyfh0IrBpKpt72Xj8Ua+fsXsSZnM4M+MMfzTVXO4/dfbeOTdMu45y9zpqXQy7e+3lNLQ0cedq7N95gWEwxhuWp7Jz944zpM7KvjS+jyP7bbn7+8YetrAkIvnCmuYkxrl9dnSZzMnJRIDHKppU5ieZHpmETmLwSEXf9hWTmNnH3eel811i9LGNHM2KiSQz67LZUlmDK8frueJ7eX0D7o8WLGcaktRIyGBDm5fOf1CgDvWzEzgglmJPPBWEW09U3/udENHHz95/TizkiOYkxrl7XI+ICLYyUeXpNPQ2ce7xdoh0VveOdZAc1c/S3zsxMNThQc7yUkI51BNu7dLmXZGlQqMMVcaY44aY4qMMd88zfeDjTFPjnx/mzEmZ+TyeGPMm8aYTmPMz065zjJjzP6R6/zEqMFHfIy1lg37qilv7ubjSzOYk+LeE21ggIMbl2Vw1fwUDla388t3imnt7p/gauV0OvsG2VvRyseWZhCrk0FH7RtXzKa1e4Bfvl3s7VI87r9ePkLPwBDXLPDNFqDZKVHMTo7kjSP12lTHS57ZU0VsWCD5yd7tpR+NgtQo6tr7aOrs83Yp08o5w7QxJgB4ALgKKABuM8YUnHLY3UCLtTYP+BFw/8jlvcC3gK+d5qZ/AdwL5I98XOnOA5DJY62lf9CFa5rszrW1pImdZS1cNDuRhRnjW5EwxrAuP5E7z8uhuaufX20s0S+7SbDtRBODLstn1moc3ljMT4/m+sVp/HbzCerae71djscUVrbyx12VfHptDomRwd4u54yuWZDK4JDl1UN13i5l2mnvHeC1Q3V8ZFGax9psJlLByLsrWp2eXKP5yVgJFFlrS6y1/cATwPWnHHM98PuRz58G1htjjLW2y1q7ieFQ/T5jTCoQZa191w7vm/owcMN4Hoh4TnvPAO8ca+DHrx/nu88d5FvPHuB7fz3If750hJ+8fpwndpTz9rEGhlxTJ2RvKWrk+f01zE2JnNAh+LNTIvnsulz6B138emMJ9R1TN6h4W9/gEFtLmpmVHEFeku+vKPmaf7xsNkMuy49fO+7tUjzCWst3NxwkPjyIL63P93Y5Z5UQGcyamfHsKmuhqqXH2+VMKy/tr6Vv0MVHl6R7u5RRiQ0PIjU6hEPVCtOTaTRhOh2oOOnrypHLTnuMtXYQaAPONssnfeR2znabABhj7jXG7DTG7GxoaBhFuTIRXC7Lgao2fr+llPtfOsJLB2sJCwzg8oJkLpqdxJKsWHITw4kND6KovpO7frud8+9/g/9++SiljV3eLn9cypu6+cJju0mICOam5ZkTfsJgekwon12Xi8vCrzeeoLZNgdoT3jnWSFffIJfMTvJ2KX4pKz6MO1Zl89TOCoobOr1dzoR7dm8Vu8tb+cYVc4gKCfR2Oed08ZwkwoKdPFdYjZ0m7w76gj/triQnPozFPt4vfbKC1CjKm7vVFjSJRhOmT5ckTv2fPJpj3DreWvsra+1ya+3yxETfnO841QwOufjaH/fx2PZyatp6uGBWIl+9dBafu3AmF81O4rKCZD6yMI0bl2XyydXZfPPKOfz8jqXMSYnk528VcdF/v8XfPbKL6lb/W0HpHRji3kd2Yi180oNn9qdEhXDPulwCDPx6Y4lWmyZYW88Am4oaWJAeTZa2DnfbfZfkEeJ08N8vH/V2KROqq2+Q/3jxCAszorlxWYa3yxmVkMAArihIpry5m32Vbd4uZ1ooa+pi24lmblruXxvnFKRFYeFDG4uJ54wmTFcCJ0/vzwCqz3SMMcYJRAPN57jNk3+Dne42xQv6Boe477E9PLOnivVzk/jGlXO4Yl4KCWfpJ3QGOLh6QSoPfXolW765nr9fn89bx+q59Idv86t3ihkY8p/pFf/zylGO1Hbw41sXEx/h2R7KxMhg7r1gJiGBDn6zqcTvV/R9yauH6nBZuGKeb8wM9lcJEcM/oy8eqOWF/TXeLmfCPPBmEXXtfXznI/NwOPwnJC3NjiU9JpSXDtRoKtAkeHpXJQ4DH1vqHy0e70mJCiE2LFCtHpNoNGF6B5BvjJlhjAkCbgU2nHLMBuCukc9vBN6wZ3kfylpbA3QYY1aPTPG4E/jLmKuXCdXTP8S9D+/ipYO1fOvaAtbPSR5zi0NKdAhfuWwWr37lQtbMjOf/vnCEj/x0EztLz/bayjdsK2niN5tOcMeqLC6epNaAuPAg7lmXS2RIIL/dfILDOmlk3Kpbe9hT3sKa3Hht5z4BPn/RTBZnxvBPTxdS3tTt7XLG7XBNO796p4SPLU1nWXast8sZE4cxfGRhKu29g7x9rN7b5UxpQy7L07squWBW4mk36PJlxhgKUqMobuikb2DI2+VMC+cM0yM90PcBLwOHgaestQeNMd8zxlw3ctiDQLwxpgj4KvD++DxjTCnwQ+BTxpjKkyaBfB74DVAEFAMvTsxDEnd09A5w10Pbeed4A//xsQXcff74ph9kxoXxm7tW8KtPLqOjd5Ab/9+7/PsLh332JMWuvkG+9vQ+MmPD+Jer507qfceEBfG5C3JJiQ7h0a1lfvHCw1dZa3nhQA0hgQFcpF7pCRHkdPDT25ZgDHzxsd30Dfrvk/PgkIt/+lMh0aGB/Os1pw6l8g9Z8eEszIhmU1GjemI9aFNRIzVtvdy8PPNWrFarAAAgAElEQVTcB/uggrRoBl2WY/VT73wHXzSqOS/W2hestbOstTOttT8Yuezb1toNI5/3WmtvstbmWWtXWmtLTrpujrU2zlobYa3NsNYeGrl8p7V2/sht3ne2lWzxrN6BIT7x4HZ2lbXw41sWc+sEbm5x+bwUXv3qBdyxKotfvlPCbzef8MkngB+8cJjKlh7+5+ZFhAdP/sag4cFO7j5/BnlJETyzp4oH3izSSUZuOFrbQUlDF+vnJvn0TmX+JjMujP+6aRH7q9r49xeOeLsctz20uZTCyja+e908v37X4rK5yQy5LG8e1eq0pzy1o4LYsEDWz/XPF+VZcWGEBQVwqFr99ZPB94cmisf96p0S9lW08tPblnD94onvDQsLcvKDjy7ghzcvorKlm5+9WURZk+/0B799rIHHtpVzz7pcVuTEea2OYGcAnzwvm4UZ0fzXy0f5t+cOMehH/ebeNuSyvHiglvjwIFbO8N6/41R1xbwUPrN2Br/bUspLB/yvf7q0sYv/efUol85N5tqFqd4uZ1ziI4JZnhPHjhMtNHdpA6iJ1tzVzyuHarlhSTrBTv98UR7gMMxNieJoXQeDLj2PeJrC9DRX0dzNA28Wcc2CVK5e4NknmI8tzeDvLpxJUICDX28sYXNRo9dXX9u6B/jG0/vIT4rgq5fN8motAE6Hg5uXZ74fWm751VYqmv2/T3Uy7ChtpqGzj6vmp/jF5gr+6JtXzWFRRjRff7rQr0KctZZvPlNIoMPB92+Y71eTGc7kktlJOBzw2mFt5DLR/rK3ioEhy03L/LPF4z0FaVH0Drg4oZPbPU7PONPc9/56iACH4V+vnZw+4dToUL5wUR6zU6J4fn8Nz+6t9loftbWWb284QFNnPz+8ebHHxuCNlcMYvv2RAv731sUcq+3g6p9s5K+FGnZzNicau3hhfw25CeHMTXVv23c5tyCng5/dvhSAh98t9ZtdPJ/YUcHWkmb+5Zq5pESHeLucCREVGsh5ufHsq2jVrPoJZK3lyR0VLEiPpiDNv3+X5CVFEBhgNNVjEihMT2NvHqnn1UN1fOmS/Ek9Wzk0KIA7VmVx0axEdpQ288jWUq+ccfzHXZX8ZW81X16fz4KM6Em//3O5fnE6L/z9OvKSIrjvsT184+l9dPcPerssn1Pd2sPD75YSExbErSuzpsSqoy/LjAvjl58YPrH4gbeKOOLjE2hq23r5v88f5rzceG5d4d8rjae6YFYiwYEOXj1U6+1SpoyD1e0cqe3g5uX+MX/8bAIDHOQmRFDSoJVpT5v8M63EJ/QODPHd5w6Smxg+7skd7nAYw+XzUogJC2LDvip+vbGEO8/LISp0cnYiO17XwXf+cpA1M+P54sV5k3Kf7siMC+Opz53H/752nAfeKmJzURN/d2EuNy3PfH8l/bFt5aO6rdtXTdyJpTDco1xU30lRfSflzV109w/RO+Cib3D4T6fDkBkXSlZcONnxYfQNDk14/2FjZx8PbSklJDCAz6zNIcILJ49OR2vyEvjixXk8tq2Mh7eWcfHsJNbPTZrw3ULHq6tvkL97dBcDLhf//rEFU+6FVliQkwvyE3nlUB1lTV1ka4OicXtqZwVBTgfXLfKv2dJnkhMfxtG6Djr7BvX70YP0NztN/fqdEsqaunn07lUEOcf/BsVoA92pVs6IIzo0kMe3l/OLt4u5a00OKVGefRu2d2B4Y5qwoAB+fMtiAnx804bAAAdfu2I26/ITuP+lI3zrLwf539eL+Oy6GXxidbbH739gyEVjZx/1HX3Ut/fR0NFLfUcfTZ39DJ3S8x4U4CDY6SA40EH/oIv23g+upMeHBzEnJZI5qVFkx4eNq7e5pq2H324+gbWWz6zNJSbMf6cz+KO48CA+d+FMNuyt5s2j9VS1dnPjskyfecJ+bzfT/VVt/PyOpeQkTM2guWZmAluKm3jlUB2fPX/GlHvBMJl6B4Z4dk8VV85LITrM97eYH433fu7LmrqYl+Z778BOFb7xW08mVUXz8ESNaxakcn5+grfLYXZKJPdckMvDW0r55dvFfGJ1NjMTIzx2f//23CGO1nXw8GdWkuTh4D6RVuXG86fPr2HbiWYeeLOI/3jxCD9/s4g5KVHkJIQzIyGc6HGs7Lf1DFDa2MXx+k5eOlBLfUcvDR19NHf1815kNgyHqKTIYOamRnHdojTykyPISQgnIsj5od3k2roHKGvuoqypm+f2VVM6sj3v5uImgp0O8pMjKUiNZE5K1Jh61us7ernzwe309A/x2fNzSTzLDp3iOYEBDj62NJ3MuDCeK6zm/peOMDc1iuXZseQlRXhtpXpwyMWXH9/D5qIm/uemRVN6J8wgp4OLZyfyXGENx+s7mZUc6e2S/NbLB2tp7x3029nSp5MeE4rTYShtVJj2JIXpaegHzx/GYQz/55rJ3ZzkbNJjQvn8RTP53ZZSfre5lI+OcfvW0bYwPLevmse3l/P5i2ZywaxEd0r1KmMMq3PjWZ0bT2FlK798u4TXDtexfWSjl9iwQHLiw4kOCyTEGUBoYADBgcOrxc8X1tDZN0BH7yCdfYO0dPVT1dpDZUsPVS09dPT9bRU5wBjiI4JIjQllUWYMSZHBJEWGEB8RRGDA31aTP77s7H2F0WGBLAyLYWFGDB0jq9T9gy6KGzo5Ujvcm3igqo0AhyE/KYJ5adFcvSDltKvMQy7LxuMN/HFnJa8eqgMDd67OJj3Wv3Ynm2qMMaycEUdOQhg7TjSzp6KVA1VtRIcGsiQrhtzEcOanR0/airXLZfnGnwp55VAd3/1IwTl/RqeCFTPi2FzcxAv7a5iZGOHz77b5oiGX5RdvFZMTH8aamfHeLmfCOAMcZMSGUToFdi/1ZQrT00xTZx8vHazly5fkkRbjWyFkeCfAmfxhWxlP76qkpbufS2YnTdjblmVNXfzzM/tZlh3rE2PwxmthRgwP3LGUR94to7a9l9LGLkqbuiiq76Szb5BTZ6T8/t2yD3wdGewkPTaUjNhQVs2IIyM2jMy4MPKTI9hS1OSxJ+Qgp4O5qVHMTY3CZS0Vzd0crG7nQFUbR2o7eHZvFSlRIaTFhJAaHUrqyPSFDfuqqWnrJTYskDtWZ3H7yix2lLZ4pEYZu6TIEK5ZmMYV81M4UtPBzrJm3j7awFtHGzAG8hIjWJARzaKMGBZmRDM3dWzvRoyGtZbv/fUQz+yu4h8vm8Wn1k7++SDe4HQ4uHp+Ko9uK2PbiSbWzPT+O47+5tk9VRyp7eCnty350Dts/i4nIYx3jjV45LwVGaYwPc3sKG0hwGG4fZXne23dERoUwKfW5vDn3VW8frie1q4BbliSPu5gV9zQyZ0PbifAYfjJbUs+sLrq7wIchvSYUNJjQlmbN/wkaq2lf9BF76CLnoEhBgZd3LAknYgQJxHBTsKDAnCe5e9gW8nkbGnuMIbs+HCy48O5an4KVa09BAY4KG/uprq1h70Vrbx0oJdBl4sLZyXy7WsLuGRu0vtPCArTvsfpcDA/PZr56dF09g2SnxTBvspW9le28c6xRp7ZXTVynGF2SiQLM6JZOBKwh1zWrf/r1lq2FDfxo1ePsbOshXvWzeC+S3z3xGJPmJsaSV5iBK8drmNRRoxXdnL1V32DQ/zw1WMsSI/mGg/vt+ANOfHhvGUbKG/uJj9JbUCeoP9t08igy8Wu8hYumZPk07NWnQ4HNy7LICYsiDeP1tPU1c+NyzLc3v63sLKVTz20AwP84bOrSPexFXlPMMYQHBhAcGDA+33Us1N8+5eoMYaM2LAPtey4XJb+IZfPzAGX0YsIdnLxnCQunjO8JbO1lpq2Xgor2yisbGV/VRsv7K/l8e0VwHDATo0OIT02jIzY4ReIiZHBZ+293lLcyI9fPc720mZSokL4/g3zuWPV9BuRaIzhmoWp/PSN47x2uM4ju9lOVY9uLaeqtYf7P75wyq1Kw/DW4gYobVSY9hSF6WnkcE0HXX2D3L5yYkekeYIxhssKkkmICGLDvmp+8sZxrp6fyoqc2DE9SW463sjnHtlJbHgQj9y9ihlT9Ix+b3F3istYOByGEIeC9FRgjCEtJpS0mFCunD98UqC1lvLmbvZVtvHUjgoqW3rYXdbC1pImYLgtKC16uB0pyOmgb2CIwspWOvoGqRy5XnJUMP923TxuWZE5rV90JUeFsGpGPFtLmlg5I87b5fiF9t4BfvbGcdblJ/jECfmeEBIYQGpMCKVNmjftKQrT08iOE83EhAb61Yl3S7JiyUkI55ndlTy7t4qD1W18bGnGqKZWPF9Yw1ee3MuMhHAevnslyX40uUNkujAntfp0jpyk6rKWho4+qlp6qGztpqqlh60lTQy6LEFOB0UNnYQHO4kKCeQ7HyngtpVZ0zpEn2z93CT2VrTy18IavnrZrGm3Qj9Wv36nhJbuAf7pyjneLsWjcuLD2X6imUGXa1wjSeX0FKaniabOPooaOrl0bpLfnekdGxbEp9fOYPuJZl48UMP/vn6MlTlxzEyKICc+/AP9z02dfbx+uJ6XD9byxtF6lmXF8uBdK6bMzFCR6cBhDMlRISRHhbA0OxYYnrZgzPD3JnoDoqkkLMjJZQXJbNhXzUsHarlqCvYAT5T69l5+s/EE1y5MZX761B4blxMfzpbiJqpbesjS5j4TTmF6mthR2oLDwLJs/3zrzzEyEi4/KYK/FtawuaiJd4434nQYsuLCKG/uZnd5CztLm3HZ4VF7967L5R8unUVokFasRPydvy0CeNOKnDi2n2jm+88f5uI5SX6zaj845KK5u5/mruGPzt7B4bn2USEkeWCW/E/eOM7AkIuvXT57wm/b12THhwFQ2tStMO0BCtPTwHsnHs5OiRrXph6+ID4imLvW5NA3OERpYzfFDZ0UN3Ty/94uZk5KJPddks/lBcnMS4vS25siMi0FOAzXLkzlN5tO8H/+fID/vmmhT/4+7B0Y4t2SJv6yt4rj9Z20nLRBFAxvEnXy17/ZWMKizBg+sTqbNTPjx/WYDla38cT2Cm5bmTVld8c8WWRIIAkRQZQ2dXEB/tPq6S8UpqeB9048XJnjn6vSpxPsDGB2SuT7Eyo+vixd8zPljCbjREkRX5KbGMFXLp3Fj147RkFaFHef7xszt+vae3n7aAOvH6lj4/FGuvuHCAww5CVFsiQzhrjwIOLDg4iLCCYsKICWrn7qO/qoa+8lLCiAjccbefFALXNSIrn7/BlctzhtzL/73znWwBf+sJv4iCC+vD7fQ4/U9+TEh3Owuh2XtV7bnXSqUpieBt478TA/2XNbdHubgrSIyAd96ZI8DtW08YPnDzE7OXJCplWM9oXpe33tg0Mu9lS08uaRet462sChmnYAUqJC+OiSdC4tSKa8qfuMs//jI4KJjwhmbmoUt6/KondgiA17q3lw0wm+/nQh9790lE+uzuYTq7OIjzh3K8jj28v512cPkJ8UwUOfXkGiB9pHfFVOfDg7y1qob+/z6fG4/khheor724mHyXolKiIyjTgchv+5eTEf//kWvvjYbjbct5bsSeiX7egd4I87K3jrWAMbjzXQ3jtIgMOwLDuWb1w5m4tmJTE3NfL9No2xvHMUEhjAzSsyuWl5BpuLmnhwUwk/eu0YD7xVxEcXp3P3uhnMSv7wLGWXy3L/y0f45dslXDgrkQfuWDppW9z7ivfaWUqbuhSmJ9j0+kmahv524mGst0sREZFJFhHs5Nd3Lue6BzZxz8M7eeYLayc8RLqspaK5m6N1HRyr66C6tReApMhgrpyfwkWzk1iblzCh5+wYYzh/ZDZ0UX0nD20+wZ92V/LkzgrW5SewKCMGi8VacFk4XNPO28cauGNVFv923byz7gA7VcWGBRIV4qS0qYvVufHeLmdKUZiewqy1HKhuIy8pwu9PPBQR/6R+de/Lig/jgduXcudvt/MPT+zhx7cuGXeg7u4b5MhIeD5e10nPwBAOA5lxYVxekMzfX5pPQerknAielxTBDz66gK9dPpvHtpfz6NYyNhc1YozBMDwNKsjp4F+vmcvd58/wyZMxJ8N7M91LG7uw1k7bvwdPUJiewuo7+mju6mfdFN3VSURERmdtXgLfvraA72w4yCX//Rb/cvVcrl+cNqZA1dE7wMHqdg5Vt1PS2InLQmSwk7mpUcxKjiA/KfL9UaTz0iZ/bnNseBBfvDiPL16cN+n37S9yEsLZX9VGS/cAceFB3i5nylCYnsKOjJzoMSclysuViIiIt921JoeFGdF8d8NB/uHJvTy6tYzvXjfvjMcPulxUNPdQ3NBJUX0nFc3dWCAhIoh1+YnMS4siLSZU5+P4kZz35013KUxPIIXpKexwbQdpMSFq8RAREQCWZMXy5y+s5eldldz/0hE+8rNN5CVGEBoUQKDDQaDT4HQ4qO/opbSxm/4hFwZIiwnlkrlJzEuLJjkyWC0Cfio5KoRgp4OK5m6WZulcqomiMD1FdfYNUtHczSVzkrxdioiI+BCHw3DzikyumJ/CT18/zvP7a2jp7mdgyNI/6GLQ5SI6NIglWTHkJUWQmxChnWSnCIcxpMeEUtnS4+1SphSF6SnqaG0HFpiTqhYPERH5sOjQQP712gJyE6fuHgTyYRmxYWwuamRwyOXtUqaM6TcbZpo4UttOVIiTNM2SFBERkREZsaEMWUtNW6+3S5kytDI9BQ0MuThe18nirBj1tfkpjRMTERFPyIgNBaCypdvLlUwdCtNT0InGLvqHXMxN+fAuUCIi04lemIp8UHRoIBHBTvVNTyC1eUxBh2vaCQww6oMTERGRDzDGkBGrkxAnklampxhrLUdqO8hPiiRwGm6XKuKLtDoqMn5j+X90+6osD1bi/zJiQzla20FH7wCRIRqfO14K01NMTVsvbT0DXDr3zCPx9MQuIiIyfWXEhmGB/VVtrJmpXZLHS0uXU8zh2nYMMFu7HoqIiMhpZMQMn4S4r6LNy5VMDQrTU8yRmg4y48KICNabDiIiIvJhYcFO4sKDKKxs9XYpU4IS1xTS3jNAVWsPlxcke7uUSTfa1hX10YnIeKlVTqaC9JhQ9lUoTE8ErUxPIUdrOwDteigiIiJnlxkbSnVbLw0dfd4uxe8pTE8hxxs6iQ4NJDky2NuliIiIiA9Ljw0DUKvHBFCbxxThspaShk5mJ0dq10MRcYvaF0Smj/SYUBwG9lW0sn7u9GsPnUgK01NEXXsv3f1DzNRGLeLnFOhERDwvyOlgVnIk+yo10WO8FKaniJKGLgByE8O9XIlvU1ATEREZtjAjmlcO1WGt1bva46AwPUWUNHQSHx5ETFiQt0sRERHxKk14Gp1FmTE8tbOSiuYesuLDvF2O31KYngIGh1yUNHaxMCPG26WID9OqvIiInGzRSG7YV9mqMD0OmuYxBRysbqdv0MVMtXiIiIjIKM1OiSTI6dC86XHSyvQUsKW4CYAZCQrTIiIi3uYv7wQGBjiYlxZFoU5CHBetTE8BW4obSY4KJjIk0NuliIiIiB9ZlBHD/qo2Bodc3i7FbylM+7n+QRc7SpvJTdBIPBERERmbRZnR9AwMUdTQ6e1S/JbCtJ/bW9FK74D6pUVERGTs3hteUFihVg93KUz7uS3FjRgDM7QyLSIiImM0Iz6cyBAne7WtuNsUpv3cluIm5qdFExoU4O1SRERExM84HIaFGdEUKky7TWHaj/X0D7G3vJU1M+O9XYqIiIj4qYUZMRyp6aB3YMjbpfglhWk/tqushf4hF+cpTIuIiIibFmXEMOiyHKpp93Ypfklh2o9tKW7E6TCsyInzdikiIiLipxZlRgNQqM1b3KIw7ce2FDexODOG8GDtvSMiIiLuSYkKISkymH3avMUtSmF+qr13gMLKVu67OM/bpYiIiHyAv+wAKMOMMSzMiGGfTkJ0i1am/dTO0mZcFlarX1pERETGaVFGNCUNXbT1DHi7FL+jMO2ntpY0ExTgYGlWrLdLERERET+3KHN485YDVWr1GCu1efiprSVNLM6KISRQ86VFZHpRC4HIxFuYMXwS4r7KVtbmJXi5Gv+ilWk/1NE7wIGqNlbP0BQPERERGb+YsCBy4sPYp4keYzaqMG2MudIYc9QYU2SM+eZpvh9sjHly5PvbjDE5J33vn0cuP2qMueKky0uNMfuNMXuNMTsn4sFMFzvLWob7pXPVLy0iIiITY2FGDIWa6DFm5wzTxpgA4AHgKqAAuM0YU3DKYXcDLdbaPOBHwP0j1y0AbgXmAVcCPx+5vfdcbK1dbK1dPu5HMo1sLWkiMMCwRP3SIiIiMkEWZkRT09ZLfXuvt0vxK6NZmV4JFFlrS6y1/cATwPWnHHM98PuRz58G1htjzMjlT1hr+6y1J4CikduTcdhW0syijBhCg9QvLSIiIhNj8chJiJo3PTajCdPpQMVJX1eOXHbaY6y1g0AbEH+O61rgFWPMLmPMvWMvfXrq7Btkf1WbWjxERERkQs1LiybAYSjUvOkxGc00D3Oay+wojznbdddaa6uNMUnAq8aYI9badz5058NB+16ArKysUZQ7te0qa2HIZVmVq5MPRURExkOTYT4oNCiAWcmR7NVJiGMympXpSiDzpK8zgOozHWOMcQLRQPPZrmutfe/PeuDPnKH9w1r7K2vtcmvt8sTExFGUO7VtLWnC6TAsy1a/tIiIiEysRRnR7K9qw9pT103lTEYTpncA+caYGcaYIIZPKNxwyjEbgLtGPr8ReMMO/ytsAG4dmfYxA8gHthtjwo0xkQDGmHDgcuDA+B/O1LetpImFGdGEBWlEuIiIiEyshRkxtHYPUN7c7e1S/MY5w/RID/R9wMvAYeApa+1BY8z3jDHXjRz2IBBvjCkCvgp8c+S6B4GngEPAS8AXrbVDQDKwyRizD9gOPG+tfWliH9rU090/SGGl+qVFRETEMxZlDm/eolaP0RvV8qa19gXghVMu+/ZJn/cCN53huj8AfnDKZSXAorEWO93tKmth0GVZpTAtIiIiHjArOZJgp4PCyjauX3zqvAk5He2A6Ee2ljQRoH5pERER8ZDAAAfz06O1E+IYKEz7kW0lzSxIjyYiWP3SIiIi4hkLM6I5UN3G4JDL26X4BYVpP9HTP8S+ylaNxBMRERGPWpIVS++Ai8M1Hd4uxS8oTPuJ3eUtDAxZnXwoIiIiHrUiZ7iddHtps5cr8Q8K035iW0kTDgPL1S8tIiIiHpQaHUpGbCg7TihMj4bCtJ/YOtIvHRkS6O1SREREZIpbmRPHzrJmbd4yCgrTfqB3YIi9Fa0aiSciIiKTYnlOHI2d/Zxo7PJ2KT5PYdoPbD/RTP+Qi/NmKkyLiIiI562cMdxWukN90+ekMO0HNhU1EhTgYNUMTfIQERERz5uZGEFsWCA7Slu8XYrPU5j2AxuPN7IsO5awIM2XFhEREc8zxrA8J04r06OgMO3jGjr6OFzTzvn5Cd4uRURERKaRlTlxlDV1U9/e6+1SfJrCtI/bXNQIwDqFaREREZlEK0baS9XqcXYK0z5u4/FGYsMCmZcW7e1SREREZBqZlxZFaGCAWj3OQWHah1lr2Xi8gTV5CQQ4jLfLERERkWkkMMDBkqwYtmvzlrNSmPZhx+s7qe/oY12eWjxERERk8q3IieNIbTvtvQPeLsVnKUz7sI3Hh/uldfKhiIiIeMOKnDhcFnaXqW/6TBSmfdim4w3kJoSTERvm7VJERERkGlqSFUOAw6hv+iwUpn1U3+AQW0uatSotIiIiXhMe7GR+WpQmepyFwrSP2l3WSs/AEOerX1pERES8aHlOHHsrWukbHPJ2KT5JYdpHbSpqIMBhOG9mvLdLERERkWlsRU4c/YMu9le2ebsUn6Qw7aM2Hm9kSWYMkSGB3i5FREREprEVObGANm85E4VpH9TS1c/+qjb1S4uIiIjXxUcEk5sYrpMQz0Bh2gdtKW7CWm0hLiIiIr5hdW4820qa1Dd9GgrTPmhTUQORwU4WZcR4uxQRERERLp2bRFf/8KQx+SCFaR9jreXtow2cNzMeZ4D+eURERMT71sxMIDQwgNcO1Xm7FJ+jtOZj9lS0Ut3Wy5XzU7xdioiIiAgAIYEBrMtP4LXDdVhrvV2OT3F6uwD5oOcLawgKcHBpQbK3SxERET/22LZyb5cgU8ylBcm8cqiOg9XtzE+P9nY5PkMr0z7E5bK8sL+GC2YlEKWReCIiIuJDLpmThDHw2mG1epxMYdqH7Klooaatl2sWpnq7FBEREZEPSIgIZmlWrML0KRSmfcjzhbUEOR1cOlctHiIiIuJ7Lp2bzIGqdmraerxdis9QmPYR77V4XDgrUbseioiIiE+6rCAJgNcO13u5Et+hMO0jdpe3UNvey7Vq8RAREREfNTMxgpz4MI3IO4nCtI/4a2ENQU4H69XiISIiIj7KGMOlc5N5t7iJzr5Bb5fjExSmfYDLZXnxQA0Xz04kIljTCkVERMR3XVqQTP+Qi43HGrxdik9QmPYBu8pbqGvv4+oFavEQERER37Y8O5bo0EBe1VQPQGHaJzxfWEOwWjxERETEDzgDHFwyJ4k3j9QzOOTydjlepzDtZUMjUzwunp2kFg8RERHxC5fOTaale4Dd5a3eLsXrFKa9bOf/b+/Og6yszjyOf3/sNotAC8gi2CyiQGSNgUAiOsYgY4mTwYBx4hIsK4kZYWacaGYyE7WSSayhJpHEOOMWNWbQiEsIZhTjFg2isskygCJLB2h2wRakoeln/nhfxk5XY99uod974fepovq+55733Kf71Ol+OPfcc9bvYlt5hQ9qMTMzs4Lx+TNOoXlTMXfFlqxDyZyT6YzNfmszrZonb5eYmZmZFYK2rZpz/pmdmbVoIx8eOJR1OJlyMp2hnR9U8PiijVx8djdae4mHmZmZFZCvjS5h976DPLF4Y9ahZMrJdIYemLeeisoqvn5u76xDMTMzM6uXc0o6Mqh7O+5/dR1VVZF1OJlxMp2R8v0HeXDeei4c0PwVfqMAAAxWSURBVIW+ndtmHY6ZmZlZvUji2jG9eXf7Xl4+gfecdjKdkZlvlPL+/kq+ObZv1qGYmZmZNcj4T3WlS7uW3PfquqxDyYyT6QxUVB7i3lfWMbpvMYNPa591OGZmZmYN0qJZE64cdTqvrtnBqi3vZx1OJpxMZ+DxhZvYVl7BN871rLSZmZkVtis+05NWzZtw/wk6O+1kupEdqgr+6w/vcnaPkxndtzjrcMzMzMw+kfZFLZg4vAdPLdnM9vKKrMNpdE6mG9nvlpWxYec+vjm2D5KyDsfMzMzsE7tmdAkHKqt4eP6GrENpdE6mG1FE8POX3qV3p9ZcOODUrMMxMzMzOyr6dGrD+Wd25uH5G9h/8MQ6xMXJdCN6fuU2Vpa9z9fP7UOTJp6VNjMzs+PHlDEl7Nx7gP9+vTTrUBqVk+lGsq18Pzc/sYy+ndtw6ZDuWYdjZmZmdlR9tk8x557RidufWcXqLeVZh9NonEw3gkNVwbRHlvBBxUHu/MowWjTzj93MzMyOL5KYftlg2rZqxg0zF58wyz2c1TWCGc+/w7x3d3LbhEH0P9WnHZqZmdnxqVPblky/bDCrt5bzb79bmXU4jcLJ9DH2xzU7mPHCO3xpWHcuG94j63DMzMzMjqmx/Ttz7ZgSHnptA3NXbMk6nGPOyfQxtK18P1MfWUKfTm34/qWDvBWemZmZnRD+cVx/BnZrx7cfX8qWPfuzDueYcjJ9jBw8VMXUmR+tky5q0SzrkMzMzMwaRctmTZlx+VAqDlYx7dHFVB6qyjqkY8bJ9DGwafeHTL57Pq+t9TppMzMzOzH16dSGWycMZP7aXXzlnteP2xlqJ9NH2dwVWxh/xyus3lLOjMuH8uURp2UdkpmZmVkmvjziNH48aTDLN+9h/IxXeGn1tqxDOuqcTB8lFZWHuGX2Cq775UJ6dixizt+O4ZLB3bIOy8zMzCxTfzW0B7O/NYbObVty9S/e5PZnVh1Xyz68kPcT2rX3AE8v3czD80tZvbWcr40u4aaL+tOyWdOsQzMzMzPLC307t+Gp60dz629XcNdL7zJvzQ6+Oup0vjiwC21bNc86vE8kp2Ra0jjgDqApcG9E/KjG8y2Bh4DhwE5gUkSsT5/7DjAFOATcEBHP5tJmPttbUckLq7bx1OJNvPz2diqrgv5d2nLPlSP4woAuWYdnZmZmlndaNW/KD790NiN7FzN97mpufOwt/vnJJlxwVhcmDOnGuf07FeRkZJ3JtKSmwJ3AF4CNwJuSZkfE/1arNgV4LyL6SpoM3A5MkjQAmAwMBLoBv5d0RnpPXW3mhXlrdrBgw3ts2LmP0l17Wb9zH9vLKwA4tV0rpnyuhEuHdOesru0yjtTMzMws/00Y0p1LBndjUelufrNkE3OWlvH0sjKaCLqefBK9iovo2bGI0zoWMbxXB0b2Ls465I+Vy8z0OcCaiFgLIOkRYAJQPfGdANySPp4F/EzJpsoTgEciogJYJ2lN2h45tJkXfru0jJlvlHJqu1b0LC7ivP6d6FXcmqE92zOypJgmTbx3tJmZmVl9SGJ4rw4M79WBf7l4AK++s4PFpe9Rumsfpbv28fuVW9nxwQEuP6fncZFMdwf+VO16I/CZI9WJiEpJe4DitHx+jXu7p4/rahMASdcB16WXH0hanUPMR90G4PUsXjh3pwA7sg7CcuK+Kgzup8Lgfioc7qs8c0XtxXnVTz9K/2WgV64Vc0mma5t6jRzrHKm8tl1EaraZFEbcDdz9cQEaSFoQESOyjsPq5r4qDO6nwuB+Khzuq8Lgfqq/XLbG2whU3yy5B7D5SHUkNQNOBnZ9zL25tGlmZmZmltdySabfBPpJKpHUguQDhbNr1JkNXJU+ngi8EBGRlk+W1FJSCdAPeCPHNs3MzMzM8lqdyzzSNdDfAp4l2cbu/ohYIek2YEFEzAbuA36ZfsBwF0lyTFrv1yQfLKwEro+IQwC1tXn0v70TipfCFA73VWFwPxUG91PhcF8VBvdTPSmZQDYzMzMzs/ryceJmZmZmZg3kZNrMzMzMrIGcTB8HJI2TtFrSGkk3Zx2PJSSdJulFSSslrZA0NS3vKOk5Se+kXztkHaslp71KWixpTnpdIun1tJ8eTT8sbRmT1F7SLEmr0rE1ymMq/0j6u/T33nJJMyW18pjKD5Lul7RN0vJqZbWOISVmpPnFUknDsos8fzmZLnDVjnu/CBgAXJ4e427ZqwT+ISLOAkYC16d9czPwfET0A55Pry17U4GV1a5vB36c9tN7wJRMorKa7gCeiYgzgcEkfeYxlUckdQduAEZExCCSjQYm4zGVLx4AxtUoO9IYuohkJ7Z+JAfo3dVIMRYUJ9OF7/+Pe4+IA8Dho9ktYxFRFhGL0sflJH/0u5P0z4NptQeBS7OJ0A6T1AP4S+De9FrA+cCstIr7KQ9Iagd8nmQHKSLiQETsxmMqHzUDTkrPnigCyvCYygsR8QeSndeqO9IYmgA8FIn5QHtJXRsn0sLhZLrw1Xbce/cj1LWMSDodGEpyKn2XiCiDJOEGOmcXmaV+AnwbqEqvi4HdEVGZXntc5YfewHbgF+mSnHsltcZjKq9ExCZgOlBKkkTvARbiMZXPjjSGnGPkwMl04cvluHfLkKQ2wOPAtIh4P+t47M9JuhjYFhELqxfXUtXjKnvNgGHAXRExFNiLl3TknXS97QSgBOgGtCZZLlCTx1T+8+/CHDiZLnw+mj2PSWpOkkj/KiKeSIu3Hn6bLP26Lav4DIDRwCWS1pMskzqfZKa6ffoWNXhc5YuNwMaIeD29nkWSXHtM5ZcLgHURsT0iDgJPAJ/FYyqfHWkMOcfIgZPpwuej2fNUuu72PmBlRPxHtadmA1elj68CftPYsdlHIuI7EdEjIk4nGT8vRMQVwIvAxLSa+ykPRMQW4E+S+qdFf0Fywq7HVH4pBUZKKkp/Dx7uJ4+p/HWkMTQbuDLd1WMksOfwchD7iE9APA5IGk8yk3b4aPYfZBySAZLGAK8Ay/hoLe4/kayb/jXQk+SPzmURUfPDIJYBSWOBGyPiYkm9SWaqOwKLgb+JiIos4zOQNITkg6ItgLXANSQTQx5TeUTSrcAkkl2NFgPXkqy19ZjKmKSZwFjgFGAr8D3gKWoZQ+l/hn5GsvvHPuCaiFiQRdz5zMm0mZmZmVkDeZmHmZmZmVkDOZk2MzMzM2sgJ9NmZmZmZg3kZNrMzMzMrIGcTJuZmZmZNZCTaTMzMzOzBnIybWZ2jEk6XdLyetS/WlK3HOo9IGliXfVq3DNW0pz63FPt3mmSihpyr5nZ8crJtJlZ/rkaqDOZzsA0oF7JtKSmxygWM7O84GTazKxxNJV0j6QVkuZKOknSEEnzJS2V9KSkDulM8wjgV5KWpPWGS3pZ0kJJz0rqmssLSvq0pHmS3pL0hqS2NZ6/RdKN1a6Xp7PorSU9nd63XNIkSTeQJPgvSnoxrX+hpNckLZL0mKQ2afl6Sf8q6VXgsqP08zMzy0tOps3MGkc/4M6IGAjsBv4aeAi4KSLOJjl2/nsRMQtYAFwREUNIjmP+KTAxIoYD9wM/qOvFJLUAHgWmRsRg4ALgwxxjHQdsjojBETEIeCYiZgCbgfMi4jxJpwDfBS6IiGFpzH9frY39ETEmIh7J8TXNzApSs6wDMDM7QayLiCXp44VAH6B9RLyclj0IPFbLff2BQcBzkgCaAmU5vF5/oCwi3gSIiPcB0jbqsgyYLul2YE5EvFJLnZHAAOCPaZstgNeqPf9oLi9kZlbonEybmTWOimqPDwHtc7xPwIqIGFXP1xMQddSp5M/foWwFEBFvSxoOjAd+KGluRNxWS/vPRcTlR2h7bz3jNTMrSF7mYWaWjT3Ae5I+l15/FTg8S10OHF7fvBroJGkUgKTmkgbm0P4qoJukT6f3tZVUcwJlPTAsfX4YUJI+7gbsi4iHgemH69SIaz4wWlLf9J4iSWfk8o2bmR1PPDNtZpadq4D/TLebWwtck5Y/kJZ/CIwCJgIzJJ1M8nv7J8CKj2s4Ig5ImgT8VNJJJOulL6hR7XHgSklLgDeBt9PyTwH/LqkKOAh8Iy2/G/gfSWXpuumrgZmSWqbPf7daG2ZmJwRF1PUuoJmZmZmZ1cbLPMzMzMzMGsjLPMzMCpykJ0nXO1dzU0Q8m0U8ZmYnEi/zMDMzMzNrIC/zMDMzMzNrICfTZmZmZmYN5GTazMzMzKyBnEybmZmZmTXQ/wGUxvk0lan6WQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x19d9a4952b0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.figure(figsize=(12, 6))\n",
    "sns.distplot(df['hotel_cluster'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The data is pretty much well distributed over all 100 clusters and there is skewness in the data."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Feature Engineering"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datetime import datetime\n",
    "def get_year(x):\n",
    "    if x is not None and type(x) is not float:\n",
    "        try:\n",
    "            return datetime.strptime(x, '%Y-%m-%d').year\n",
    "        except ValueError:\n",
    "            return datetime.strptime(x, '%Y-%m-%d %H:%M:%S').year\n",
    "    else:\n",
    "        return 2013\n",
    "    pass\n",
    "\n",
    "def get_month(x):\n",
    "    if x is not None and type(x) is not float:\n",
    "        try:\n",
    "            return datetime.strptime(x, '%Y-%m-%d').month\n",
    "        except:\n",
    "            return datetime.strptime(x, '%Y-%m-%d %H:%M:%S').month\n",
    "    else:\n",
    "        return 1\n",
    "    pass\n",
    "    \n",
    "def left_merge_dataset(left_dframe, right_dframe, merge_column):\n",
    "    return pd.merge(left_dframe, right_dframe, on=merge_column, how='left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# msk = np.random.rand(len(df)) < 0.8\n",
    "# train = df[msk]\n",
    "# test = df[~msk]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# dealing with date_time column\n",
    "\n",
    "df['date_time_year'] = pd.Series(df.date_time, index = df.index)\n",
    "df['date_time_month'] = pd.Series(df.date_time, index = df.index)\n",
    "\n",
    "from datetime import datetime\n",
    "df.date_time_year = df.date_time_year.apply(lambda x: get_year(x))\n",
    "df.date_time_month = df.date_time_month.apply(lambda x: get_month(x))\n",
    "\n",
    "del df['date_time']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# work on srch_ci column\n",
    "\n",
    "df['srch_ci_year'] = pd.Series(df.srch_ci, index=df.index)\n",
    "df['srch_ci_month'] = pd.Series(df.srch_ci, index=df.index)\n",
    "\n",
    "# convert year & months to int\n",
    "df.srch_ci_year = df.srch_ci_year.apply(lambda x: get_year(x))\n",
    "df.srch_ci_month = df.srch_ci_month.apply(lambda x: get_month(x))\n",
    "\n",
    "# remove the srch_ci column\n",
    "del df['srch_ci']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# work on srch_co column\n",
    "\n",
    "df['srch_co_year'] = pd.Series(df.srch_co, index=df.index)\n",
    "df['srch_co_month'] = pd.Series(df.srch_co, index=df.index)\n",
    "\n",
    "# convert year & months to int\n",
    "df.srch_co_year = df.srch_co_year.apply(lambda x: get_year(x))\n",
    "df.srch_co_month = df.srch_co_month.apply(lambda x: get_month(x))\n",
    "\n",
    "# remove the srch_co column\n",
    "del df['srch_co']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>site_name</th>\n",
       "      <th>posa_continent</th>\n",
       "      <th>user_location_country</th>\n",
       "      <th>user_location_region</th>\n",
       "      <th>user_location_city</th>\n",
       "      <th>orig_destination_distance</th>\n",
       "      <th>user_id</th>\n",
       "      <th>is_mobile</th>\n",
       "      <th>is_package</th>\n",
       "      <th>channel</th>\n",
       "      <th>...</th>\n",
       "      <th>hotel_continent</th>\n",
       "      <th>hotel_country</th>\n",
       "      <th>hotel_market</th>\n",
       "      <th>hotel_cluster</th>\n",
       "      <th>date_time_year</th>\n",
       "      <th>date_time_month</th>\n",
       "      <th>srch_ci_year</th>\n",
       "      <th>srch_ci_month</th>\n",
       "      <th>srch_co_year</th>\n",
       "      <th>srch_co_month</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>32352134</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>66</td>\n",
       "      <td>174</td>\n",
       "      <td>24103</td>\n",
       "      <td>2323.5232</td>\n",
       "      <td>802499</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>9</td>\n",
       "      <td>...</td>\n",
       "      <td>4</td>\n",
       "      <td>125</td>\n",
       "      <td>177</td>\n",
       "      <td>44</td>\n",
       "      <td>2014</td>\n",
       "      <td>5</td>\n",
       "      <td>2014</td>\n",
       "      <td>7</td>\n",
       "      <td>2014</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29796021</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>66</td>\n",
       "      <td>311</td>\n",
       "      <td>25538</td>\n",
       "      <td>2288.6121</td>\n",
       "      <td>85229</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>50</td>\n",
       "      <td>659</td>\n",
       "      <td>59</td>\n",
       "      <td>2013</td>\n",
       "      <td>6</td>\n",
       "      <td>2013</td>\n",
       "      <td>7</td>\n",
       "      <td>2013</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15185156</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>66</td>\n",
       "      <td>294</td>\n",
       "      <td>40046</td>\n",
       "      <td>587.6970</td>\n",
       "      <td>755217</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>9</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>50</td>\n",
       "      <td>642</td>\n",
       "      <td>22</td>\n",
       "      <td>2014</td>\n",
       "      <td>10</td>\n",
       "      <td>2014</td>\n",
       "      <td>12</td>\n",
       "      <td>2014</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3301948</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>66</td>\n",
       "      <td>332</td>\n",
       "      <td>55121</td>\n",
       "      <td>2234.4394</td>\n",
       "      <td>160733</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>9</td>\n",
       "      <td>...</td>\n",
       "      <td>4</td>\n",
       "      <td>47</td>\n",
       "      <td>1502</td>\n",
       "      <td>65</td>\n",
       "      <td>2014</td>\n",
       "      <td>8</td>\n",
       "      <td>2015</td>\n",
       "      <td>1</td>\n",
       "      <td>2015</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25429119</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>66</td>\n",
       "      <td>314</td>\n",
       "      <td>47869</td>\n",
       "      <td>839.0087</td>\n",
       "      <td>1078493</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>50</td>\n",
       "      <td>685</td>\n",
       "      <td>6</td>\n",
       "      <td>2014</td>\n",
       "      <td>3</td>\n",
       "      <td>2014</td>\n",
       "      <td>4</td>\n",
       "      <td>2014</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          site_name  posa_continent  user_location_country  \\\n",
       "32352134          2               3                     66   \n",
       "29796021          2               3                     66   \n",
       "15185156          2               3                     66   \n",
       "3301948           2               3                     66   \n",
       "25429119          2               3                     66   \n",
       "\n",
       "          user_location_region  user_location_city  orig_destination_distance  \\\n",
       "32352134                   174               24103                  2323.5232   \n",
       "29796021                   311               25538                  2288.6121   \n",
       "15185156                   294               40046                   587.6970   \n",
       "3301948                    332               55121                  2234.4394   \n",
       "25429119                   314               47869                   839.0087   \n",
       "\n",
       "          user_id  is_mobile  is_package  channel      ...        \\\n",
       "32352134   802499          0           1        9      ...         \n",
       "29796021    85229          0           0        9      ...         \n",
       "15185156   755217          0           1        9      ...         \n",
       "3301948    160733          0           1        9      ...         \n",
       "25429119  1078493          0           0        9      ...         \n",
       "\n",
       "          hotel_continent  hotel_country  hotel_market  hotel_cluster  \\\n",
       "32352134                4            125           177             44   \n",
       "29796021                2             50           659             59   \n",
       "15185156                2             50           642             22   \n",
       "3301948                 4             47          1502             65   \n",
       "25429119                2             50           685              6   \n",
       "\n",
       "          date_time_year  date_time_month  srch_ci_year  srch_ci_month  \\\n",
       "32352134            2014                5          2014              7   \n",
       "29796021            2013                6          2013              7   \n",
       "15185156            2014               10          2014             12   \n",
       "3301948             2014                8          2015              1   \n",
       "25429119            2014                3          2014              4   \n",
       "\n",
       "          srch_co_year  srch_co_month  \n",
       "32352134          2014              7  \n",
       "29796021          2013              7  \n",
       "15185156          2014             12  \n",
       "3301948           2015              1  \n",
       "25429119          2014              4  \n",
       "\n",
       "[5 rows x 27 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Any correlations?\n",
    "We want to know if anything correlates well with hotel_cluster .This will tell us if we should pay more attention to any particular columns."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "srch_destination_type_id    -0.036120\n",
       "site_name                   -0.027497\n",
       "hotel_country               -0.023837\n",
       "is_booking                  -0.022898\n",
       "user_location_country       -0.020239\n",
       "srch_destination_id         -0.016736\n",
       "srch_co_month               -0.005874\n",
       "srch_rm_cnt                 -0.005570\n",
       "srch_ci_month               -0.005015\n",
       "date_time_month             -0.002142\n",
       "channel                     -0.001386\n",
       "date_time_year              -0.000435\n",
       "cnt                          0.000378\n",
       "hotel_continent              0.000422\n",
       "user_location_city           0.001241\n",
       "user_id                      0.003891\n",
       "orig_destination_distance    0.006084\n",
       "user_location_region         0.006927\n",
       "srch_ci_year                 0.008562\n",
       "is_mobile                    0.008788\n",
       "srch_co_year                 0.009287\n",
       "posa_continent               0.012180\n",
       "srch_adults_cnt              0.012407\n",
       "srch_children_cnt            0.014901\n",
       "hotel_market                 0.022149\n",
       "is_package                   0.047598\n",
       "hotel_cluster                1.000000\n",
       "Name: hotel_cluster, dtype: float64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.corr()[\"hotel_cluster\"].sort_values()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "No column correlates linearly with hotel_cluster, this means that linear regression and logistic regression won't work well on our data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(241179, 27)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For known combinations of user location cities, origin-destination distances and search destinations, will definitely help finding hotel cluster."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "pieces = [df.groupby(['srch_destination_id','hotel_country','hotel_market','hotel_cluster'])['is_booking'].agg(['sum','count'])]\n",
    "agg = pd.concat(pieces).groupby(level=[0,1,2,3]).sum()\n",
    "agg.dropna(inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>sum</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>srch_destination_id</th>\n",
       "      <th>hotel_country</th>\n",
       "      <th>hotel_market</th>\n",
       "      <th>hotel_cluster</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">4</th>\n",
       "      <th rowspan=\"5\" valign=\"top\">7</th>\n",
       "      <th rowspan=\"5\" valign=\"top\">246</th>\n",
       "      <th>22</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                              sum  count\n",
       "srch_destination_id hotel_country hotel_market hotel_cluster            \n",
       "4                   7             246          22               0      1\n",
       "                                               29               0      1\n",
       "                                               30               0      1\n",
       "                                               32               1      2\n",
       "                                               43               0      1"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "agg.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "agg['sum_and_cnt'] = 0.85*agg['sum'] + 0.15*agg['count']\n",
    "agg = agg.groupby(level=[0,1,2]).apply(lambda x: x.astype(float)/x.sum())\n",
    "agg.reset_index(inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>srch_destination_id</th>\n",
       "      <th>hotel_country</th>\n",
       "      <th>hotel_market</th>\n",
       "      <th>hotel_cluster</th>\n",
       "      <th>sum</th>\n",
       "      <th>count</th>\n",
       "      <th>sum_and_cnt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>7</td>\n",
       "      <td>246</td>\n",
       "      <td>22</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.125</td>\n",
       "      <td>0.073171</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4</td>\n",
       "      <td>7</td>\n",
       "      <td>246</td>\n",
       "      <td>29</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.125</td>\n",
       "      <td>0.073171</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>7</td>\n",
       "      <td>246</td>\n",
       "      <td>30</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.125</td>\n",
       "      <td>0.073171</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>7</td>\n",
       "      <td>246</td>\n",
       "      <td>32</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.250</td>\n",
       "      <td>0.560976</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>7</td>\n",
       "      <td>246</td>\n",
       "      <td>43</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.125</td>\n",
       "      <td>0.073171</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   srch_destination_id  hotel_country  hotel_market  hotel_cluster  sum  \\\n",
       "0                    4              7           246             22  0.0   \n",
       "1                    4              7           246             29  0.0   \n",
       "2                    4              7           246             30  0.0   \n",
       "3                    4              7           246             32  1.0   \n",
       "4                    4              7           246             43  0.0   \n",
       "\n",
       "   count  sum_and_cnt  \n",
       "0  0.125     0.073171  \n",
       "1  0.125     0.073171  \n",
       "2  0.125     0.073171  \n",
       "3  0.250     0.560976  \n",
       "4  0.125     0.073171  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "agg.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "agg_pivot = agg.pivot_table(index=['srch_destination_id','hotel_country','hotel_market'], columns='hotel_cluster', values='sum_and_cnt').reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>hotel_cluster</th>\n",
       "      <th>srch_destination_id</th>\n",
       "      <th>hotel_country</th>\n",
       "      <th>hotel_market</th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>...</th>\n",
       "      <th>90</th>\n",
       "      <th>91</th>\n",
       "      <th>92</th>\n",
       "      <th>93</th>\n",
       "      <th>94</th>\n",
       "      <th>95</th>\n",
       "      <th>96</th>\n",
       "      <th>97</th>\n",
       "      <th>98</th>\n",
       "      <th>99</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>7</td>\n",
       "      <td>246</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>8</td>\n",
       "      <td>50</td>\n",
       "      <td>416</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.025210</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>11</td>\n",
       "      <td>50</td>\n",
       "      <td>824</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>14</td>\n",
       "      <td>27</td>\n",
       "      <td>1434</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>16</td>\n",
       "      <td>50</td>\n",
       "      <td>419</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.344828</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 103 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "hotel_cluster  srch_destination_id  hotel_country  hotel_market   0   1   2  \\\n",
       "0                                4              7           246 NaN NaN NaN   \n",
       "1                                8             50           416 NaN NaN NaN   \n",
       "2                               11             50           824 NaN NaN NaN   \n",
       "3                               14             27          1434 NaN NaN NaN   \n",
       "4                               16             50           419 NaN NaN NaN   \n",
       "\n",
       "hotel_cluster   3   4   5   6 ...  90        91  92  93  94  95  96  97  98  \\\n",
       "0             NaN NaN NaN NaN ... NaN       NaN NaN NaN NaN NaN NaN NaN NaN   \n",
       "1             NaN NaN NaN NaN ... NaN  0.025210 NaN NaN NaN NaN NaN NaN NaN   \n",
       "2             NaN NaN NaN NaN ... NaN       NaN NaN NaN NaN NaN NaN NaN NaN   \n",
       "3             NaN NaN NaN NaN ... NaN       NaN NaN NaN NaN NaN NaN NaN NaN   \n",
       "4             NaN NaN NaN NaN ... NaN  0.344828 NaN NaN NaN NaN NaN NaN NaN   \n",
       "\n",
       "hotel_cluster  99  \n",
       "0             NaN  \n",
       "1             NaN  \n",
       "2             NaN  \n",
       "3             NaN  \n",
       "4             NaN  \n",
       "\n",
       "[5 rows x 103 columns]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "agg_pivot.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.merge(df, dest, how='left', on='srch_destination_id')\n",
    "df = pd.merge(df, agg_pivot, how='left', on=['srch_destination_id','hotel_country','hotel_market'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.fillna(0, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(241179, 276)"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We are only interested in booking events."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.loc[df['is_booking'] == 1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = df.drop(['user_id', 'hotel_cluster', 'is_booking'], axis=1)\n",
    "y = df.hotel_cluster"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((20032, 273), (20032,))"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.shape, y.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "100"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y.nunique()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Random Forest"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.24865023372782996"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf = make_pipeline(preprocessing.StandardScaler(), RandomForestClassifier(n_estimators=273,max_depth=10,random_state=0))\n",
    "np.mean(cross_val_score(clf, X, y, cv=10))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### SVM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.3228727137315005"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn import svm\n",
    "\n",
    "clf = make_pipeline(preprocessing.StandardScaler(), svm.SVC(decision_function_shape='ovo'))\n",
    "np.mean(cross_val_score(clf, X, y, cv=10))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Naive Bayes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.10347912437041926"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.naive_bayes import GaussianNB\n",
    "\n",
    "clf = make_pipeline(preprocessing.StandardScaler(), GaussianNB(priors=None))\n",
    "np.mean(cross_val_score(clf, X, y, cv=10))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Multi-class Logistic Regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.30445543572367767"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "\n",
    "clf = make_pipeline(preprocessing.StandardScaler(), LogisticRegression(multi_class='ovr'))\n",
    "np.mean(cross_val_score(clf, X, y, cv=10))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### KNN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.25631461834732266"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "\n",
    "clf = make_pipeline(preprocessing.StandardScaler(), KNeighborsClassifier(n_neighbors=5))\n",
    "np.mean(cross_val_score(clf, X, y, cv=10, scoring='accuracy'))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
